# library(AIgeomorphologist)
devtools::load_all()
#> ℹ Loading AIgeomorphologist
#> Registered S3 method overwritten by 'geojsonsf':
#> method from
#> print.geojson geojson
library(sen2r)
#> Welcome to sen2r. To use the package from a GUI, launch
#> > sen2r()
#> Documentation: https://sen2r.ranghetti.info
#> Help and bug report: https://github.com/ranghetti/sen2r/issuesThe labelled points data are stored in SSCT_data. The function to_spatial() transform the data.frame into sp object.
labelled_points <- SSCT_data %>% to_spatial() %>% sf::st_as_sf()
one_random_point <- labelled_points %>% dplyr::sample_n(1)
one_random_point
#> Simple feature collection with 1 feature and 2 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -121.5061 ymin: 36.21968 xmax: -121.5061 ymax: 36.21968
#> CRS: +proj=longlat +datum=WGS84 +no_defs
#> SiteID ward geometry
#> 1 SCC_CL_2_31656 10 POINT (-121.5061 36.21968)First, we retrieve the list of the tile id for the S2 satellites over the spatial_extent of interest.
# add id_tiles to labelled_points
ind <- sapply(sf::st_intersects(labelled_points, sen2r::s2_tiles()), function(z) if (length(z)==0) NA_integer_ else z[1])
id_tiles <- sen2r::s2_tiles()[ind, ] %>% dplyr::pull(tile_id) %>% unique()
usethis::use_data(id_tiles, overwrite = TRUE)id_tiles
#> [1] "10TDM" "10TCM" "10TDL" "10TEL" "10TFM" "10TEM" "10TDK" "10SEJ" "10SDJ"
#> [10] "10TCL" "10SEH" "10SDH" "10TCK" "10TFK" "10TFL" "10TGL" "10TGM" "10SGJ"
#> [19] "10TEK" "10SFJ" "10SGH" "10TGK" "10SFH" "11SMT" "11SMS" "11SLT" "10SGD"
#> [28] "11SKU" "11SLU" "11SNS" "10SEG" "10SFE" "10SEF" "10SFF" "10SFG" "10SGE"
#> [37] "11SLB" "11SLA" "11SMU" "11SNT" "11SKC" "11SMV" "11SNV" "11SPU" "11SNU"
#> [46] "10SGG" "11SKB" "11SKA" "11SLV" "10SGF"Second, we retrieve the entire list of the sensed tiles for the period of interest. This command can take a few hours to run.
search_results <- s2_list(
tile = id_tiles,
time_interval = c(as.Date("2019-06-01"), as.Date("2020-10-01")),
server = "gcloud",
time_period = "full",
level = "L2A",
availability = "online"
)
usethis::use_data(search_results, overwrite = TRUE)search_results <- search_results %>% as.data.frame()
dim(search_results)
#> [1] 9442 12
colnames(search_results)
#> [1] "name" "url" "mission"
#> [4] "level" "id_tile" "id_orbit"
#> [7] "sensing_datetime" "ingestion_datetime" "clouds"
#> [10] "footprint" "uuid" "online"
head(search_results)
#> name
#> 1 S2B_MSIL2A_20190601T183929_N0212_R070_T10TGL_20190601T222206.SAFE
#> 2 S2B_MSIL2A_20190601T183929_N0212_R070_T10TGM_20190601T222206.SAFE
#> 3 S2B_MSIL2A_20190601T183929_N0212_R070_T10SGJ_20190601T222206.SAFE
#> 4 S2B_MSIL2A_20190601T183929_N0212_R070_T10SFJ_20190601T222206.SAFE
#> 5 S2B_MSIL2A_20190601T183929_N0212_R070_T10SGH_20190601T222206.SAFE
#> 6 S2B_MSIL2A_20190601T183929_N0212_R070_T10TGK_20190601T222206.SAFE
#> url
#> 1 gs://gcp-public-data-sentinel-2/L2/tiles/10/T/GL/S2B_MSIL2A_20190601T183929_N0212_R070_T10TGL_20190601T222206.SAFE/
#> 2 gs://gcp-public-data-sentinel-2/L2/tiles/10/T/GM/S2B_MSIL2A_20190601T183929_N0212_R070_T10TGM_20190601T222206.SAFE/
#> 3 gs://gcp-public-data-sentinel-2/L2/tiles/10/S/GJ/S2B_MSIL2A_20190601T183929_N0212_R070_T10SGJ_20190601T222206.SAFE/
#> 4 gs://gcp-public-data-sentinel-2/L2/tiles/10/S/FJ/S2B_MSIL2A_20190601T183929_N0212_R070_T10SFJ_20190601T222206.SAFE/
#> 5 gs://gcp-public-data-sentinel-2/L2/tiles/10/S/GH/S2B_MSIL2A_20190601T183929_N0212_R070_T10SGH_20190601T222206.SAFE/
#> 6 gs://gcp-public-data-sentinel-2/L2/tiles/10/T/GK/S2B_MSIL2A_20190601T183929_N0212_R070_T10TGK_20190601T222206.SAFE/
#> mission level id_tile id_orbit sensing_datetime ingestion_datetime
#> 1 2B 2A 10TGL 070 2019-06-01 18:39:29 <NA>
#> 2 2B 2A 10TGM 070 2019-06-01 18:39:29 <NA>
#> 3 2B 2A 10SGJ 070 2019-06-01 18:39:29 <NA>
#> 4 2B 2A 10SFJ 070 2019-06-01 18:39:29 <NA>
#> 5 2B 2A 10SGH 070 2019-06-01 18:39:29 <NA>
#> 6 2B 2A 10TGK 070 2019-06-01 18:39:29 <NA>
#> clouds
#> 1 25.182886
#> 2 59.057457
#> 3 13.727439
#> 4 6.254699
#> 5 13.039538
#> 6 13.173152
#> footprint
#> 1 POLYGON((-120.21088 40.527464112909335, -120.18938 40.59771807196067, -120.143906 40.74483797817248, -120.098236 40.89193972768891, -120.05307 41.03919338960453, -120.008575 41.18662490967887, -119.96184 41.33335054811086, -119.91577 41.48023008253367, -119.907196 41.5083620608322, -119.28943 41.49194363396281, -119.34445 40.50488583658978, -120.21088 40.527464112909335))
#> 2 POLYGON((-119.93434 41.421012232997306, -119.91577 41.48023008253367, -119.870865 41.62745286900263, -119.82495 41.77445123040062, -119.77861 41.921365826364166, -119.731964 42.068235973248576, -119.68565 42.21532441687911, -119.63928 42.36245497516326, -119.62691 42.401428368630995, -119.23686 42.39088066238852, -119.29443 41.40403474612011, -119.93434 41.421012232997306))
#> 3 POLYGON((-120.69324 38.92419035411651, -120.67732 38.9789127368281, -120.632034 39.1258089025245, -120.586655 39.27266815204012, -120.542816 39.41993885723559, -120.49855 39.56701196021211, -120.456085 39.71460393582964, -120.454025 39.72136180996339, -119.387634 39.69403311805837, -119.43793 38.70656329494251, -120.699356 38.73821864423846, -120.69324 38.92419035411651))
#> 4 POLYGON((-120.747986 38.738403429976536, -120.72026 38.83136414410842, -120.67732 38.9789127368281, -120.632034 39.1258089025245, -120.586655 39.27266815204012, -120.56523 39.34466576762448, -120.58624 38.73593643111066, -120.747986 38.738403429976536))
#> 5 POLYGON((-120.69653 38.82628170870513, -119.43355 38.79452724007538, -119.48164 37.80685703267152, -120.72763 37.83751218570364, -120.69653 38.82628170870513))
#> 6 POLYGON((-120.47946 39.633358287778705, -120.456085 39.71460393582964, -120.41147 39.861603967740805, -120.366745 40.00861927488247, -120.32405 40.156265176075856, -120.2796 40.30346029523268, -120.234436 40.45054114658088, -120.18938 40.59771807196067, -120.18408 40.61485723707699, -119.33966 40.59281342227294, -119.39224 39.60554763793313, -120.47946 39.633358287778705))
#> uuid online
#> 1 <NA> TRUE
#> 2 <NA> TRUE
#> 3 <NA> TRUE
#> 4 <NA> TRUE
#> 5 <NA> TRUE
#> 6 <NA> TRUEWe can now analyze these search results so that we only download the tiles we need. One paramount parameter is the amount of clouds identified in the imagery. To define the threshold we defined min_SAFE as the minimum number of sensed tiles across all tiles for a given cloud threshold and sum_SAFE as the total of sensed tiles for a given cloud threshold.
search_stats <- search_results %>% dplyr::group_by(id_tile) %>% dplyr::summarise(dplyr::across(dplyr::contains("clouds"), list(min = min, mean = mean))) %>% dplyr::summarise(clouds_min = min(clouds_min), clouds_mean = mean(clouds_mean))
clouds_stats <- data.frame(
clouds_threshold = seq(ceiling(search_stats$clouds_min), ceiling(search_stats$clouds_mean), by = 0.1)
) %>%
dplyr::mutate(
min_SAFE = sapply(clouds_threshold, function(clouds_threshold){
search_results %>%
dplyr::group_by(id_tile) %>%
dplyr::group_map(~ {dplyr::filter(., clouds <= clouds_threshold) %>% nrow()}) %>%
unlist() %>%
min()
}),
sum_SAFE = sapply(clouds_threshold, function(clouds_threshold){
search_results %>%
dplyr::group_by(id_tile) %>%
dplyr::group_map(~ {dplyr::filter(., clouds <= clouds_threshold) %>% nrow()}) %>%
unlist() %>%
sum()
})
)The graph below shows the trade-off between incorporating more cloud: the minimum number of sensed tiles goes up, but so does the total number of sensed tiles. Each tile is roughly 1 GB.
p <- ggplot2::ggplot(clouds_stats %>% reshape2::melt(id.vars = c("clouds_threshold")), ggplot2::aes(x = clouds_threshold, y = value)) +
ggplot2::geom_point() +
ggplot2::facet_wrap(. ~ variable, scales = "free") +
ggpubr::theme_pubclean()
plotly::ggplotly(p)From these two graphs, we select the following threshold:
sensing_limit <- 30
clouds_stats %>% dplyr::filter(min_SAFE > sensing_limit) %>% head(1)
#> clouds_threshold min_SAFE sum_SAFE
#> 1 5.1 31 4166
clouds_threshold <- clouds_stats %>% dplyr::filter(min_SAFE > sensing_limit) %>% head(1) %>% dplyr::pull(clouds_threshold)which is appropriate as the cloud percentage remains low, at least 30 tiles are sensed for any tile, and less than 3 TB of SAFE data have to be downloaded.
The selection of the product for downloading is then done by:
selected_search_results <- search_results %>% dplyr::filter(clouds <= clouds_threshold)
dim(selected_search_results)
#> [1] 4166 12Additionally, we can visualize the sensing dates with the following codes:
p <- ggplot2::ggplot(selected_search_results, ggplot2::aes(x = sensing_datetime, y = clouds, color = id_tile)) +
ggplot2::geom_point() +
ggpubr::theme_pubclean()
plotly::ggplotly(p)Visual inspection of the above graph tile by tile does not indicate that there is a significant skew towards a given period for any tile. Example of a tile with discontinuous record is 10TGK. The tile with the lowest average cloud cover is 10SEF.
The following graph shows the distribution of the number of sensed tiles:
count_df <- data.frame(count = selected_search_results %>%
dplyr::group_by(id_tile) %>%
dplyr::group_map(~ nrow(.)) %>%
unlist()
)
ggplot2::ggplot(count_df, ggplot2::aes(x = count)) +
ggplot2::geom_density() +
ggplot2::labs(x = "number of sensed tiles") +
ggpubr::theme_pubclean()Running s2_download() then allows to retrieve all the tile imagery provided that a safelist is passed as s2_prodlist.
class(search_results)
#> [1] "data.frame"
selected_search_results <- selected_search_results %>% as("safelist")
class(selected_search_results)
#> [1] "safelist" "character"s2_download(s2_prodlist = selected_search_results, outdir = "path/to/your/outdir")
- Level-0 is compressed raw data. The Level-0 product contains all the information required to generate the Level-1 (and upper) product levels.
- Level-1
- Level-1A is uncompressed raw data with spectral bands coarsely coregistered and ancillary data appended.
- Level-1B data is radiometrically corrected radiance data. The physical geometric model is refined using available ground control points and appended to the product, but not applied.
- Level-1C product provides orthorectified Top-Of-Atmosphere (TOA) reflectance, with sub-pixel multispectral registration. Cloud and land/water masks are included in the product.
- Level-2A product provides orthorectified Bottom-Of-Atmosphere (BOA) reflectance, with sub-pixel multispectral registration. A Scene Classification map (cloud, cloud shadows, vegetation, soils/deserts, water, snow, etc.) is included in the product.
The SAFE format has been designed to act as a common format for archiving and conveying data within ESA Earth Observation archiving facilities. The SAFE format wraps a folder containing image data in a binary data format and product metadata in XML. This flexibility allows the format to be scalable enough to represent all levels of SENTINEL products.
A SENTINEL-2 product refers to a directory folder that contains a collection of information (Figure 1). It includes:
- a manifest.safe file which holds the general product information in XML
- a preview image in JPEG2000 format
- subfolders for measurement datasets including image data (granules/tiles) in GML-JPEG2000 format
- subfolders for datastrip level information
- a subfolder with auxiliary data (e.g. International Earth Rotation & Reference Systems (IERS) bulletin)
- HTML previews
from S2 MSI data formats